set.seed(3980)
require(spdep)
require(kohonen)
require(RColorBrewer)
require(ggplot2)
require(reshape2)
First step is to read in the data and convert to percentages. Steps:
gl <- read.csv("./Data/GreatLakesAll_500.csv")
sitesf <- as.factor(substr(gl$Sample,0,4))
ages <- gl$YrBP
Extract pollen
poll <- gl[,3:24]
pollSum <- apply(poll,1,sum)
poll <- poll/pollSum*100
poll.s<- sqrt(poll)
Quick boxplot to check values
plot.df = melt(poll.s, variable.name = "Taxa", value.name = "Abundance")
x = ggplot(plot.df, aes(x=Taxa, y=Abundance)) + geom_boxplot()
x = x + ggtitle("All abundance values") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(x)
Start here by building the grid. We use a large grid and then re-cluster (k-means) afterwards.
gridX<-4
gridY<-3
grid.som <-somgrid(gridX, gridY, "hexagonal")
Build SOM, using standard unsupervised mapping, and plot the codebook vectors
poll.som <- som(as.matrix(poll.s), grid = grid.som, rlen=1000)
plot(poll.som, codeRendering="stars")
plot(poll.som, type="changes")
plot(poll.som, type="counts")
plot(poll.som, type="dist.neighbours")
myRCBage = function(n, pal="YlGnBu") {
brewer.pal(n, pal)
}
nnodes = gridX*gridY
nodeAge = rep(NA, nnodes)
for (i in 1:nnodes) {
nodeID = which(poll.som$unit.classif == i)
if (length(nodeID) >0) {
nodeAge[i] = mean(ages[nodeID])
}
}
plot(poll.som, type="property", property=nodeAge/1000, main="Mean node age in ka BP",
palette.name=myRCBage, ncolors=9)
#add.cluster.boundaries(poll.som, poll.som.kmean$cluster)
How do variables map to the SOM?
myRCBpol = function(n, pal="Greens") {
brewer.pal(n, pal)
}
taxNames = names(poll)
for (i in 1:length(taxNames)) {
plot(poll.som, type="property", property=poll.som$codes[[1]][,taxNames[i]], main=taxNames[i],
palette.name=myRCBpol, ncolors=9)
# add.cluster.boundaries(poll.som, som.clus)
}
# allsites = unique(sitesf)
# for (i in 1:length(allsites)) {
# siteID= which(sitesf==allsites[i])
# plot(poll.som, type="dist.neighbours", keepMargins=TRUE, main=allsites[i])
# # add.cluster.boundaries(poll.som, som.clus)
# site.crd <- poll.som$grid$pts[poll.som$unit.classif,][siteID,]
# lines(site.crd, lwd=2)
# points(jitter(site.crd))
# }